import pandas as pd
import plotly.express as px
pd_merge = pd.read_csv('datasets/water_life.csv')
pd_merge.dropna(inplace=True)
fig = px.scatter(pd_merge, x='Water', y='Life', animation_frame='Year', animation_group='Country Name', color='Country Name', hover_name='Country Name', size_max=55, range_x=[1,100], range_y=[0,90])
fig.show()
import pandas as pd
import plotly.express as px
# Load the CSV file
data = pd.read_csv('datasets/water_life.csv')
# Drop rows with missing values for water or life expectancy
data_clean = data.dropna(subset=['Water', 'Life'])
# Ensure 'Water' and 'Life' columns are numeric
data_clean['Water'] = pd.to_numeric(data_clean['Water'], errors='coerce')
data_clean['Life'] = pd.to_numeric(data_clean['Life'], errors='coerce')
# Round the 'Water' column to the nearest full percentage
data_clean['Water'] = data_clean['Water'].round()
# Group by the rounded 'Water' values and take the mean of 'Life' for each group
grouped_data = data_clean.groupby(['Year', 'Water'], as_index=False).agg({'Life': 'mean'})
# Create the bar plot using Plotly with a slider for years and set the bar color to purple
fig = px.line(grouped_data, x='Water', y='Life', animation_frame='Year',
range_x=[0, 100], # Adjust x-axis range for better visibility
range_y=[0, 100], # Adjust y-axis range for better visibility
labels={'Water': 'Access to Water (%)', 'Life': 'Life Expectancy'},
title='Life Expectancy vs. Access to Water Over the Years',
color_discrete_sequence=['purple'])
# Show the plot
fig.show()
/tmp/ipykernel_249300/1790060808.py:11: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
/tmp/ipykernel_249300/1790060808.py:12: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
/tmp/ipykernel_249300/1790060808.py:15: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy